Let load all required package and read train CSV

#################load required package

suppressPackageStartupMessages(require("dplyr"))
suppressPackageStartupMessages(require("readr"))
suppressPackageStartupMessages(require("ggmap"))
suppressPackageStartupMessages(require("lubridate"))
currentDate = Sys.Date()

#########Set the file dir

setwd("E:/R_Script")
filepath=getwd()
setwd(paste(filepath, "Input", sep="/"))


##read file
train_2 = read_csv("train.csv")

As dataset is large lets make a subset so that my coumpter can handle easily

##let get sample of 
train = sample_n(train_2, 100000)
str(train)
## Classes 'tbl_df' and 'data.frame':   100000 obs. of  9 variables:
##  $ Dates     : POSIXct, format: "2003-12-14 01:00:00" "2013-10-20 13:15:00" ...
##  $ Category  : chr  "LARCENY/THEFT" "NON-CRIMINAL" "ROBBERY" "STOLEN PROPERTY" ...
##  $ Descript  : chr  "PETTY THEFT FROM LOCKED AUTO" "AIDED CASE, MENTAL DISTURBED" "ROBBERY ON THE STREET, STRONGARM" "STOLEN PROPERTY, POSSESSION WITH KNOWLEDGE, RECEIVING" ...
##  $ DayOfWeek : chr  "Sunday" "Sunday" "Friday" "Wednesday" ...
##  $ PdDistrict: chr  "MISSION" "MISSION" "MISSION" "CENTRAL" ...
##  $ Resolution: chr  "NONE" "PSYCHOPATHIC CASE" "ARREST, BOOKED" "ARREST, BOOKED" ...
##  $ Address   : chr  "25TH ST / MISSION ST" "1200 Block of GUERRERO ST" "SOUTH VAN NESS AV / 17TH ST" "600 Block of SUTTER ST" ...
##  $ X         : num  -122 -122 -122 -122 -122 ...
##  $ Y         : num  37.8 37.8 37.8 37.8 37.8 ...

Lets add years, month, hour and week

##let data coloum
train$Dates = ymd_hms(train$Dates)
train$years = year(train$Dates)
train$month = month(train$Dates)
train$hour = hour(train$Dates)
train$week = week(train$Dates)

Data has many category so lets only take top 10 cat for analysis

#lets only get top 10 category
temp = train%>% group_by( Category) %>%summarise(no_sum=length(Category))
temp = temp[order(temp$no_sum, decreasing=T ), ][1:10,1]
train = left_join(temp, train)
## Joining by: "Category"
train = train[which(train$years!='2015'),]

Let plot of top 10 Category

temp = train %>% group_by( Category) %>%summarise(no_sum=length(Category))
(ggplot(temp, aes(x=reorder(Category,no_sum), y=no_sum, fill ="blue")) 
            + geom_bar(stat="identity")
            + coord_flip( )
            + ylab('Category')
            + xlab('')
            + guides(fill=FALSE)
            + ggtitle('No of Case in Top 10 Category'))  

Lets see how crime category has changed every year
options(repr.plot.width=9, repr.plot.height=3)
train$dummy = 1
temp = train %>% group_by(years, Category) %>% summarise(no=sum(dummy))
(ggplot(temp, aes(x=factor(years), y=no,  fill=Category, order=desc(no), colour = Category, group=Category )) 
            + geom_area(colour=NA, alpha=.7) 
            + scale_fill_brewer(palette="Paired")
            + geom_line(position="stack", size=.2)
            + ylab('')
            + xlab('Years')
            + ggtitle('No of Case by year'))

Lets plot No of Case by weekday

train$dummy = 1
temp = train %>% group_by(DayOfWeek) %>% summarise(no=sum(dummy))
(ggplot(temp, aes(DayOfWeek, y=no), group=1) + geom_line(aes(colour="", group=1,)) +geom_point()
            + ylab('')
            + xlab('')
            + ggtitle('No of Case by weekday')
            + theme(legend.position="none")
            + expand_limits(x = "Friday",y = 0))

No of case by hour per day– to see which

temp = train %>% group_by(hour) %>% summarise(no=sum(dummy))
(ggplot(temp, aes(factor(hour), y=no), group=1) + geom_line(aes(colour="", group=1))+geom_point()
            + ylab('Category')
            + xlab('No')
            + ggtitle('No of Case by Hour')
            + theme(legend.position="none")
            + expand_limits( y = 0))

Lets make chart by hour per day and week day

options(repr.plot.width=9, repr.plot.height=7)
temp = train %>% group_by(DayOfWeek, hour) %>% summarise(no=sum(dummy))
(ggplot(temp, aes(factor(hour), y=no), group=1) + geom_line(aes(colour="", group=1))+geom_point()
            + ylab('')
            + xlab('')
            + theme(legend.position="none")
            + expand_limits( y = 0)
            + facet_wrap(~DayOfWeek, nrow=7))

Let plot log and lat on SF map

# Download the base map
map <- get_map(location = "san francisco", zoom = 13, maptype = "roadmap")
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=san+francisco&zoom=13&size=640x640&scale=2&maptype=roadmap&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=san%20francisco&sensor=false

1. let make point chart

temp = train %>% group_by(X,Y, Category) %>% summarise(no=sum(dummy))
(ggmap(map, extent = "device") 
    + geom_point(data = temp, aes(x = X, y = Y, size = no, colour = factor(Category),alpha=0.8) )
    + theme(legend.position="none")
    + facet_wrap(~ Category, ncol=2)
    )

2. Let make heat map

options(repr.plot.width=9, repr.plot.height=24)
ggmap(map, extent = "device") +geom_point(data = train, aes(x = X, y = Y), size = 0.1, alpha=0.1)+
  geom_density2d(data = train, aes(x = X, y = Y), size = 0.3) + 
  stat_density2d(data = train, 
                 aes(x = X, y = Y, fill = ..level.., alpha = ..level.. ), size =0.1, 
                 bins = 16, geom = "polygon") + scale_fill_gradient(low = "green", high = "red") + 
  scale_alpha(range = c(0, 0.3), guide = FALSE)+  facet_wrap(~ Category, ncol=2)

Happy coding